############################################################################
############################################################################
## This file produces all figures related to simulation study in
## Ratkovic and Tingley (2016).  It generates the loads required packages,
## generates simulated data, analyzes the data using a variety of different methods,
## gathers the data and captures summary statistics, and then saves output.
############################################################################
############################################################################

library(glmnet)
library(sparsereg)
library(coda)
library(arm)
library(monomvn)
library(rstan)
library(plyr)
library(dplyr)
library(reshape2)
library(leaps)
source('BCH_LassoPlusOLS.R')



############################################################################
############################################################################
## Rstan code for function to run horseshoe.  Saved rstan object is already
## in the directory and loaded.  rstanarm can be used to fit sparse models with 
## the horseshoe prior, but it does not return parameters (theta) necessary for selecting
## the variables.
############################################################################
############################################################################

if(file.exists("stan.hs")){
	load("stan.hs")
	}else {
stan.hs.code = "
  data {
    int<lower=1> J; 
    int<lower=1> N; 
    vector[N] y; 
    matrix [N, J]  X; // Model Matrix 
  }
  parameters {
    vector[J] theta_step; 
    vector[J] lambda_step; 
    real tau;
	real<lower=0.00000001> sigma; // error scale
  }
 transformed parameters {
   vector[J] theta; 
   vector[J] lambda; 
    lambda = lambda_step * tau;
    theta = (theta_step .* lambda_step) * tau;
  }
  
  model {
    tau ~ cauchy(0, 1.0/J);
    lambda_step ~ cauchy(0, 1);
    theta_step ~ normal(0, 1);
    sigma ~ cauchy(0, 5); 
    y ~ normal(X*theta, sigma);
  }  
  
"
stan.hs.fit = stan_model(model_code=stan.hs.code, model_name="hs")

save(stan.hs.fit,file="stan.hs")
}



############################################################################
############################################################################
## Generate vectors with sample size, number of covariates, 
## number of bootstrap runs, and random number used to save results.
## The command for saving output is in line 474.  Path can be changed.
############################################################################
############################################################################
	n.run<-c(50,100,250,500,1000,2500)
	k.run<-c(10,25,50,75,100)
	ci.number<-200
	ran.num<-round(1e8*runif(1))
	


##Big loop....
for(i.k in 1:length(k.run)){
for(i.n in 1:length(n.run)){

	##Set n, k
	n<-n.run[i.n]
	k<-k.run[i.k]

############################################################################
############################################################################
## Generate outcome and independent variables
############################################################################
############################################################################

	##Treatments
	T1<-factor(sample(letters[1:2],n,r=T))
	T2<-factor(sample(letters[1:3],n,r=T))
	T3<-factor(sample(letters[1:4],n,r=T))
	treat<-data.frame(T1,T2,T3)

	##X matrix of covariates
	X<-matrix(rnorm(n*k),nr=n)
	cov.mat<-matrix(rnorm(k^2),nr=k)
	cov.mat<-t(cov.mat)%*%cov.mat
	X<-X%*%cov.mat
	X<-cbind(1,X)	
	y<-rep(0,n)
	

	##Make interations b/w X and treatment
	all.vars<-make.inter(X,treat)
	X.big<-all.vars$X
	
	##Trim columns that correlate above 0.999^.5 with others
	X.big<-apply(X.big,2,FUN=function(x) x-mean(x))
	keeps<-apply(X.big,2,sd)>0&(apply(X.big,2,FUN=function(x) sum(is.na(x)))==0)
	X.big<-X.big[,keeps]

	cor1<-cor(X.big)
	diag(cor1)<-0
	cors.run<-NULL
for(i in 1:(ncol(X.big)-1)) for(j in (i+1):ncol(X.big)){
	if(cor1[i,j]^2>0.999) cors.run<-rbind(cors.run,c(i,j))
	}

	if(length(cors.run)>0){
	drops<-as.vector(cors.run[,2])
	X.big<-X.big[,-drops]
	}
	
	##Standardize covariate matrix which includes interactions: X.big
	X.big<-apply(X.big,2,FUN=function(x) x-mean(x))
	X.big<-apply(X.big,2,FUN=function(x) x/sd(x))

	##Drop baseline value T1==a; perfectly linearly dependent with T_1==b
	drop.base<-grep("_a",colnames(X.big))
	drop.base<-drop.base[!drop.base%in%grep("T1",colnames(X.big))]
	X.big<-X.big[,-drop.base]
	
	##Generate outcome, true fitted values, and errors
    beta.true<-rep(0,ncol(X.big))
    names(beta.true)<-colnames(X.big)
	#X main effects
    beta.true[c("X2","X3","X4")]<-9
    beta.true[c("T2_b","T3_b","T3_c")]<-6
    beta.true[c("X2 x T2_b","X2 x T3_c","X2 x T1_a", "X4 x T1_a", "X3 x T3_b", "X4 x T3_c" )]<-3
    beta.true.2<-beta.true
    
	fit.true<-X.big%*%beta.true.2
	fit.true<-fit.true-mean(fit.true)

	errs<-rt(n,df=5)
	errs2<-errs/sd(errs)*sd(fit.true)
	y<-X.big%*%beta.true+errs2
	y<-as.vector(y)
	y<-y-mean(y)




############################################################################
############################################################################
## Fit sparsereg.  s1, s2, s3 were from earlier runs, now deprecated
############################################################################
############################################################################

	##Fit sparsereg
	pt0<-proc.time()
	s3<-sparsereg(y,X.big,scale.type="none")
	pt1<-proc.time()
	s1<-s2<-s3

	## Gather some of the results.  Saved to multiple objects due to 
	## earlier versions of code that were still developing the algorithm.
	lm.coefs.b<-lm.coefs<-beta.post.mean.3<-beta.post.mean.2<-beta.post.mean<-colMeans(s1$beta.mean)
	beta.post.mode.3<-beta.post.mode.2<-beta.post.mode<-apply(s1$beta.mode,2,median)
	lm.int<-s1$beta.mean

############################################################################
############################################################################
## Fit post-LASSO OLS of Belloni and Chernozhukov.
############################################################################
############################################################################

bch1<-bch_lassoplus(X.big,y)
	bch.int<-matrix(NA,nr=length(bch1$bch.postlasso),nc=ci.number)
	for(i.int in 1:ci.number) {
		boot.samp<-sample(1:n,n,TRUE)
		bch.int[,i.int]<-bch_lassoplus(X.big[boot.samp,],y[boot.samp])$bch.post
		}

############################################################################
############################################################################
## Fit model with horseshoe prior
############################################################################
############################################################################


	##Fit horseshoe model
	pt.h0<-proc.time()
	n.chains<-1
	n.iters<-1000
	test.data = list('J'=ncol(X.big),'y'=as.vector(y),'N'=length(y),'X'=X.big)
	hs.fit = sampling(stan.hs.fit, 
                            data = test.data, 
                            iter = n.iters,
                            pars = c("theta", "lambda", "tau","sigma"),
                            chains = 1)
	beta.hs = extract(hs.fit, pars=c("theta"), permuted=TRUE)[[1]]               
	hs.coef.mean<-apply(beta.hs,2,mean)
	tau.hs =  extract(hs.fit, pars=c("lambda"), permuted=TRUE)[[1]] 
	hs.coef.mode<-hs.coef.mean
	hs.coef.mode[abs(colMeans(tau.hs))<1]<-0


	##Horseshoe plus didn't work well, not used in paper
	##Just write horseshoe results over it.
	hsp.fit<-hs.fit##Horseshoeplus didn't work well; don't use
	beta.hsp = extract(hsp.fit, pars=c("theta"), permuted=TRUE)[[1]]               
	hsp.coef.mean<-apply(beta.hsp,2,mean)
	tau.hsp =  extract(hsp.fit, pars=c("lambda"), permuted=TRUE)[[1]] 
	hsp.coef.mode<-hsp.coef.mean
	hsp.coef.mode[abs(colMeans(tau.hsp))<1]<-0
	pt.h1<-proc.time()

	hs1<-hs2<-NULL

	hs1$beta<-beta.hs
	hs2$beta<-beta.hsp


	##Estimates from horseshoe
	hs.coef<-hs.coef.mode
	hs.mean<-hs.coef.mean
	hs2.coef<-hsp.coef.mode
	hs2.mean<-hsp.coef.mean

############################################################################
############################################################################
## Fit LASSO and adaptive LASSO
############################################################################
############################################################################

	##Function to fit glmnet with weights--used for perturbing/bootstrapping
	##Note--weights all of 1 returns just the lasso with BIC selection
	##The vanilla BIC given below outperformed other statistics discussed in 
	##the paper as well as cross-validation.
	fit.glmnet<-function(wts){
	glmnet2<-glmnet(X.big,y,weights=wts,alpha=1)
		BIC.func<-function(i.BIC){
			n<-length(y)
			beta.curr<-glmnet2$beta[,i.BIC]
			RSS<-sum((y-X.big%*%beta.curr)^2)/(n-sum(beta.curr!=0))
			if(RSS<1e-20)RSS<-1e20
			n*log(RSS)+(log(length(y)))*sum(beta.curr!=0)
		}
	BIC.out<-sapply(1:length(glmnet2$lam),BIC.func)
	glmnet2$beta[,BIC.out==min(BIC.out)]
	}


	##Get first stage estimates for adaptive lasso using ridge.
	pt.g0<-proc.time()	
	glmnet.ridge<-glmnet(X.big,y,alpha=0)
	cv.alasso<-cv.glmnet(X.big,y,alpha=0)
	alasso1.coefs<-abs(predict(glmnet.ridge,newx=X.big,s=cv.alasso$lambda.min,alpha=0,type="coef"))[-1]
	
	##Fit glmnet using BIC then perturb via method described in paper
	glmnet.coefs<-fit.glmnet(rep(1,n))
	glmnet.int<-matrix(NA,nr=length(glmnet.coefs),nc=ci.number)
	for(i.int in 1:ci.number) glmnet.int[,i.int]<-fit.glmnet(rexp(n))
	pt.g1<-proc.time()

	##Fit adaptive lasso--get adaptive weights
	pt.a0<-proc.time()
	w1<-alasso1.pen<-1/abs(alasso1.coefs)
	alasso1.pen<-w1/mean(w1)
	sigma.sq<-mean((y-X.big%*%alasso1.coefs)^2)

	##Function to fit adaptive lasso; pick via BIC; see above
	fit.alasso2<-function(wts){
	glmnet2<-glmnet(X.big,y,weights=wts,alpha=1,penalty.factor=alasso1.pen)
		BIC.func<-function(i.BIC){
			beta.curr<-glmnet2$beta[,i.BIC]	
			RSS<-sum((y-X.big%*%beta.curr)^2)/(n-sum(beta.curr!=0))
			if(RSS<1e-20)RSS<-1e-20
			n*log(RSS)+(log(length(y)))*sum(beta.curr!=0)
				}
	BIC.out<-sapply(1:length(glmnet2$lam),BIC.func)
	glmnet2$beta[,BIC.out==min(BIC.out)]
	}

	##Fit adaptive lasso then perturbation method
	alasso.coefs<-fit.alasso2(rep(1,n))
	alasso.int<-matrix(NA,nr=length(glmnet.coefs),nc=ci.number)
	for(i.int in 1:ci.number) alasso.int[,i.int]<-fit.alasso2(rexp(n))
	pt.a1<-proc.time()

	##Just filling in bootstrap objects
	alasso.boot<-glmnet.boot<-glmnet.int

############################################################################
############################################################################
## Summarizing results: coverage
############################################################################
############################################################################

	##Function for gathering coverage statistics.
	make.int<-function(coefs,ests,alpha=.1){
	intout<-cbind(beta.true,HPDinterval(as.mcmc(coefs),pr=1-alpha))
	intout<-cbind(intout, ((intout[,1]-intout[,2])>=0)*((intout[,3]-intout[,1])>=0 )*1,ests)
	}
	
	##Bonferroni correction with 0.1 for alpha.
	df1<-function(x) .1/max(sum(x!=0),1)
	
	##Apply to coefficient estimates
	beta.mean.cover<-make.int(s1$beta.mean,beta.post.mode,df1(beta.post.mode))
	beta.ci.cover<-make.int(s1$beta.ci,beta.post.mode,df1(beta.post.mode))
	beta.mean.cover.2<-make.int(s2$beta.mean,beta.post.mode.2,df1(beta.post.mode.2))
	beta.ci.cover.2<-make.int(s2$beta.ci,beta.post.mode.2,df1(beta.post.mode.2))
	beta.mean.cover.3<-make.int(s3$beta.mean,beta.post.mode.3,df1(beta.post.mode.3))
	beta.ci.cover.3<-make.int(s3$beta.ci,beta.post.mode.3,df1(beta.post.mode.3))
	glmnet.cover<-make.int(t(glmnet.int),glmnet.coefs,df1(glmnet.coefs))
	alasso.cover<-make.int(t(alasso.int),alasso.coefs,df1(alasso.coefs))
	glmnet.cover.boot<-make.int(t(glmnet.boot),glmnet.coefs,df1(glmnet.coefs))
	alasso.cover.boot<-make.int(t(alasso.boot),alasso.coefs,df1(alasso.coefs))
	lmb.cover.boot<-make.int((lm.int),lm.coefs.b,df1(lm.coefs.b))
	bhs.cover<-make.int((hs1$beta),hs.coef,df1(hs.coef))
	bhs.mean.cover<-make.int((hs1$beta),hs.mean,df1(hs.mean))
	blasso.cover<-make.int((hs2$beta),hs.coef,df1(hs.coef))
	blasso.mean.cover<-make.int((hs2$beta),hs2.mean,df1(hs.mean))
	bch.cover<-make.int(t(bch.int),bch1$bch.post,df1(bch1$post))

	##Gather list of coverage and time
	list.cover<-list(beta.ci.cover,beta.mean.cover,beta.ci.cover.2,beta.mean.cover.2, beta.ci.cover.3, beta.mean.cover.3,  bhs.cover, bhs.mean.cover, blasso.cover, blasso.mean.cover,glmnet.cover, alasso.cover, glmnet.cover.boot, alasso.cover.boot, lmb.cover.boot,bch.cover)
	time.all<-c(rep(pt1[3]-pt0[3],6),rep(pt.h1[3]-pt.h0[3],4), 
pt.g1[3]-pt.g0[3],pt.a1[3]-pt.a0[3],pt.g1[3]-pt.g0[3],pt.a1[3]-pt.a0[3],0,0)

	getcover<-function(x) c(tapply(x[,4],beta.true,mean),mean(x[beta.true!=0,4]),mean(x[,4]),
	sum(x[,4]*(x[,5]!=0)),sum(x[,5]!=0))
	
	##Summary of coverage statistics
	cover.summ<-matrix(unlist(lapply(list.cover,getcover)),nc=length(list.cover))
	cover.summ<-rbind(cover.summ,time.all)
	rownames(cover.summ)<-c("cover0","cover3","cover6","cover9","covernz","covertot","coverdisc","totdisc","time")


############################################################################
############################################################################
## Gather remaining results, FP, FN, FDR, etc.
############################################################################
############################################################################

	##All coefficient estimates
	coefs.all<-cbind(beta.post.mode, beta.post.mean, beta.post.mode.2,beta.post.mean.2,beta.post.mode.3,beta.post.mean.3, hs.coef,hs.mean,hs2.coef, hs2.mean,glmnet.coefs,alasso.coefs,glmnet.coefs,alasso.coefs,lm.coefs.b,bch1$bch.post)
	
	##Functions to apply to coefficients.
	falsepos<-function(b) sum(beta.true==0 & b!=0)
	falseneg<-function(b) sum(beta.true!=0 & b==0)
	fdr<-function(b) ifelse(sum(b!=0)>0,mean(beta.true[b!=0]==0),0)
	wrongsign<-function(b) sum((beta.true*b)<0)
	signagree<-function(b) tapply(sign(b)==sign(beta.true),beta.true,sum)
	Rsq.check<-function(b,X=X.big){
		mean((fit.true-X.big%*%b)^2)
	}
	
	##Focus on interaction terms.
	inters<-grep(" x ",colnames(X.big))
	inter.vec<-rep(FALSE,ncol(X.big))
	inter.vec[inters]<-TRUE
	trueinters<-function(x) {
		cond1<-beta.true[inters]!=0
		cond2<-sign(x[inters])==sign(beta.true[inters])
		sum(cond1*cond2)
		}
	falseinters<-function(x){
		cond1<-beta.true[inters]==0
		cond2<-sign(x[inters])!=0
		sum(cond1*cond2)
		}
	
	rmse.inters<-function(x){
		cond1<-beta.true[inters]!=0
		cond2<-((x-beta.true)[inters])^2
		sum(cond1*cond2)/sum(cond1)
	}
	
	##Coverage statistics.
	zdcover<-function(x){
	sum(x[inters,4]*(x[inters,5]!=0)*(beta.true[inters]==0))

	}

	nzdcover<-function(x){
	sum(x[inters,4]*(x[inters,5]!=0)*(beta.true[inters]!=0))

	}
	disccover<-function(x){
	sum(x[,4]*(x[,5]!=0))

	}


	discintercover<-function(x){
	sum(x[inters,4]*(x[inters,5]!=0))

	}	


	##RMSE statistics
	rmse.all<-function(x) mean((x-beta.true)^2)^.5
	rmse.zero<-function(x) mean((x-beta.true)[beta.true==0]^2)^.5
	rmse.nz<-function(x) mean((x-beta.true)[beta.true!=0]^2)^.5
	
	##Full output of summary
	summary.out<-rbind(n,k,ncol(X.big),
	apply(coefs.all,2,signagree),
	apply(coefs.all,2,Rsq.check),
	apply(coefs.all,2,falsepos),
	apply(coefs.all,2,falseneg),
	apply(coefs.all,2,fdr),
	apply(coefs.all,2,trueinters),
	apply(coefs.all,2,falseinters),
	apply(coefs.all,2,rmse.zero),
	apply(coefs.all,2,rmse.nz),
	apply(coefs.all,2,rmse.all),
	apply(coefs.all,2,rmse.inters),
	apply(coefs.all[beta.true==0,],2,sd),
	unlist(lapply(list.cover,disccover)),
	unlist(lapply(list.cover,zdcover)),
	unlist(lapply(list.cover,nzdcover)),
	unlist(lapply(list.cover,discintercover)),
 	sd(y-X.big%*%beta.true)
		)
	rownames(summary.out)<-c("n","k","p","agree0","agree3","agree6","agree9","Rsq","falsepos","falseneg","fdr","trueinters","falseinters","rmse.zero","rmse.nz","rmse.all","rmse.inters","sd0",
	"intercover","zintercover","nzintercover","discintercover","errorvar")

	##Combine with coverage summary from above
	summary.out<-rbind(summary.out,cover.summ)
	##Monitor where we are in loop
	print(c(n,k))


############################################################################
############################################################################
## Save output to file
############################################################################
############################################################################

	file.name<-paste("n_",i.n,"_k_",i.k,"_",round(runif(1)*1e10),sep="")
	save(summary.out,file=file.name)
	
	}}#close out n, p loops
